Merge pull request #107 from albertsun/full-json-website-agent

Website Agent Improvement

Andrew Cantino 11 年 前
コミット
a58ed91ba6
共有2 個のファイルを変更した83 個の追加35 個の削除を含む
  1. 58 35
      app/models/agents/website_agent.rb
  2. 25 0
      spec/models/agents/website_agent_spec.rb

+ 58 - 35
app/models/agents/website_agent.rb

@@ -61,7 +61,10 @@ module Agents
61 61
     end
62 62
 
63 63
     def validate_options
64
-      errors.add(:base, "url, expected_update_period_in_days, and extract are required") unless options[:expected_update_period_in_days].present? && options[:url].present? && options[:extract].present?
64
+      errors.add(:base, "url and expected_update_period_in_days are required") unless options[:expected_update_period_in_days].present? && options[:url].present?
65
+      if !options[:extract].present? && extraction_type != "json"
66
+        errors.add(:base, "extract is required for all types except json")
67
+      end
65 68
     end
66 69
 
67 70
     def check
@@ -73,45 +76,53 @@ module Agents
73 76
       end
74 77
       request.on_success do |response|
75 78
         doc = parse(response.body)
76
-        output = {}
77
-        options[:extract].each do |name, extraction_details|
78
-          result = if extraction_type == "json"
79
-                     output[name] = Utils.values_at(doc, extraction_details[:path])
80
-                   else
81
-                     output[name] = doc.css(extraction_details[:css]).map { |node|
82
-                       if extraction_details[:attr]
83
-                         node.attr(extraction_details[:attr])
84
-                       elsif extraction_details[:text]
85
-                         node.text()
86
-                       else
87
-                         error ":attr or :text is required on HTML or XML extraction patterns"
88
-                         return
89
-                       end
90
-                     }
91
-                   end
92
-          log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}"
93
-        end
94 79
 
95
-        num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq
80
+        if extract_full_json?
81
+          result = doc
82
+          if store_payload? result
83
+            log "Storing new result for '#{name}': #{result.inspect}"
84
+            create_event :payload => result
85
+          end
86
+        else
87
+          output = {}
88
+          options[:extract].each do |name, extraction_details|
89
+            result = if extraction_type == "json"
90
+                       output[name] = Utils.values_at(doc, extraction_details[:path])
91
+                     else
92
+                       output[name] = doc.css(extraction_details[:css]).map { |node|
93
+                         if extraction_details[:attr]
94
+                           node.attr(extraction_details[:attr])
95
+                         elsif extraction_details[:text]
96
+                           node.text()
97
+                         else
98
+                           error ":attr or :text is required on HTML or XML extraction patterns"
99
+                           return
100
+                         end
101
+                       }
102
+                     end
103
+            log "Extracting #{extraction_type} at #{extraction_details[:path] || extraction_details[:css]}: #{result}"
104
+          end
96 105
 
97
-        if num_unique_lengths.length != 1
98
-          error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}"
99
-          return
100
-        end
106
+          num_unique_lengths = options[:extract].keys.map { |name| output[name].length }.uniq
101 107
 
102
-        previous_payloads = events.order("id desc").limit(UNIQUENESS_LOOK_BACK).pluck(:payload).map(&:to_json) if options[:mode].to_s == "on_change"
103
-        num_unique_lengths.first.times do |index|
104
-          result = {}
105
-          options[:extract].keys.each do |name|
106
-            result[name] = output[name][index]
107
-            if name.to_s == 'url'
108
-              result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
109
-            end
108
+          if num_unique_lengths.length != 1
109
+            error "Got an uneven number of matches for #{options[:name]}: #{options[:extract].inspect}"
110
+            return
110 111
           end
112
+      
113
+          num_unique_lengths.first.times do |index|
114
+            result = {}
115
+            options[:extract].keys.each do |name|
116
+              result[name] = output[name][index]
117
+              if name.to_s == 'url'
118
+                result[name] = URI.join(options[:url], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
119
+              end
120
+            end
111 121
 
112
-          if !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json))
113
-            log "Storing new result for '#{name}': #{result.inspect}"
114
-            create_event :payload => result
122
+            if store_payload? result
123
+              log "Storing new parsed result for '#{name}': #{result.inspect}"
124
+              create_event :payload => result
125
+            end
115 126
           end
116 127
         end
117 128
       end
@@ -121,6 +132,18 @@ module Agents
121 132
 
122 133
     private
123 134
 
135
+    def store_payload? result
136
+      !options[:mode] || options[:mode].to_s == "all" || (options[:mode].to_s == "on_change" && !previous_payloads.include?(result.to_json))
137
+    end
138
+
139
+    def previous_payloads
140
+      events.order("id desc").limit(UNIQUENESS_LOOK_BACK).pluck(:payload).map(&:to_json) if options[:mode].to_s == "on_change"
141
+    end
142
+
143
+    def extract_full_json?
144
+      (!options[:extract].present? && extraction_type == "json")
145
+    end
146
+
124 147
     def extraction_type
125 148
       (options[:type] || begin
126 149
         if options[:url] =~ /\.(rss|xml)$/i

+ 25 - 0
spec/models/agents/website_agent_spec.rb

@@ -155,6 +155,31 @@ describe Agents::WebsiteAgent do
155 155
         event.payload[:version].should == 2
156 156
         event.payload[:title].should == "first"
157 157
       end
158
+
159
+      it "stores the whole object if :extract is not specified" do
160
+        json = {
161
+            :response => {
162
+                :version => 2,
163
+                :title => "hello!"
164
+            }
165
+        }
166
+        stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
167
+        site = {
168
+            :name => "Some JSON Response",
169
+            :expected_update_period_in_days => 2,
170
+            :type => "json",
171
+            :url => "http://json-site.com",
172
+            :mode => :on_change
173
+        }
174
+        checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
175
+        checker.user = users(:bob)
176
+        checker.save!
177
+
178
+        checker.check
179
+        event = Event.last
180
+        event.payload[:response][:version].should == 2
181
+        event.payload[:response][:title].should == "hello!"
182
+      end
158 183
     end
159 184
   end
160 185
 end